# -*- coding: utf-8 -*-#
#-------------------------------------------------------------------------------
# 建立者:        潘炎珍  
# Name:         demo10
# Description:
# Author:       98745
# Date:         2019/3/17
#-------------------------------------------------------------------------------

from urllib import request
import re
from bs4 import BeautifulSoup
from time import ctime,sleep
import threading
import os,sys,io
os.makedirs('urls',exist_ok=True)
URL=['https://geekori.com']
delurl=[]
def geturl():
    while(1):
        global URL
        global delurl
        try:
            if len(URL)>0:
                html=request.urlopen(URL[0]).read()
                soup=BeautifulSoup(html,'lxml')
                title=soup.find(name='title').get_text().replace('\n','')
                fp=open('./urls/'+str(title)+'.html','w',encoding='utf-8')
                fp.write(str(html.decode('utf-8')))
                fp.close()
                href1=soup.find_all(name='a')
                for each in href1:
                    urlstr=each.get('href')
                    if str(urlstr)[:4]=='http' and urlstr not in URL:
                        URL.append(urlstr)
                        print(urlstr)
                delurl.append(URL[0])
                del URL[0]
        except:
            delurl.append(URL[0])
            del URL[0]
            continue
        sleep(2)

threads=[]
t1=threading.Thread(target=geturl)
threads.append(t1)
t2=threading.Thread(target=geturl)
threads.append(t2)
t3=threading.Thread(target=geturl)
threads.append(t3)

if __name__ == '__main__':
    for t in threads:
        t.setDaemon(True)
        t.start()
    for tt in threads:
        tt.join()